#!/usr/bin/env python
# -*- coding:utf-8 -*-

# 需求：爬取药监总局生产许可证，获得企业详情页对应的内容。
# 思路：屏幕滚动时会发起分页请求http://scxk.nmpa.gov.cn:81/xk/
#      参数为：
#      响应数据对应的是一组json数据。

import requests
import json
import io

if __name__ == "__main__":
    # 1 .指定url
    # url = "http://scxk.nmpa.gov.cn:81/xk/"
    url = "http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsList"
    # 2.请求之前进行UA伪装
    headers = {
        'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Mobile Safari/537.36'
    }

    data = {
        'on': 'true',
        'page': '1',
        'pageSize': '15',
        'productName': '',
        'conditionType': '1',
        'applyname': '',
        'applysn': '',
    }

    # page_content = requests.get(url=url,headers=headers).text

    # with io.open("./huazhuangpin.html",'w',encoding="utf-8") as fp:
    #     fp.write(page_content)
    # 此时发现，企业的数据是通过ajax加载过来的的JSON
    #     http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsList
    # 详情页面显示地址：http://scxk.nmpa.gov.cn:81/xk/itownet/portal/dzpz.jsp?id=ff83aff95c5541cdab5ca6e847514f88
    # 通过对详情页进行分析，发现ajax发送请求到http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsById
    #

    id_list = [] #存储企业的id
    all_data_list = [] #存储企业数据
    json_ids = requests.post(url=url,headers=headers,data=data).json()
    print json_ids
    for dic in json_ids['list']:
        id_list.append(dic['ID'])
    print id_list

    detail_url = "http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsById"
    for id in id_list:
        data = {
            'id': id
        }
        detail_json = requests.post(url=detail_url,headers=headers,data=data).json()
        all_data_list.append(detail_json)
        # print detail_json
    print all_data_list